ohibc logo
OHI British Columbia | OHI Science | Citation policy

knitr::opts_chunk$set(fig.width = 6, fig.height = 4, fig.path = 'figs/',
                      echo = TRUE, message = FALSE, warning = FALSE)

library(raster)
library(data.table)

source('https://raw.githubusercontent.com/oharac/src/master/R/common.R')
  ### includes library(tidyverse); library(stringr); 
  ### dir_M points to ohi directory on Mazu; dir_O points to home dir on Mazu

dir_git <- '~/github/spp_risk_dists'

### goal specific folders and info
dir_setup <- file.path(dir_git, 'data_setup')
dir_data  <- file.path(dir_git, 'data')
dir_o_anx <- file.path(dir_O, 'git-annex/spp_risk_dists')

source(file.path(dir_git, 'data_setup/api_fxns.R'))

1 Summary

Having selected some cells from regions of interest, explore these by mapping and comparing values. Identify species found within each cell.

1.1 Identify species groups

Using the summary files, identify the species groups present in each cell of interest.

Then get the processed rasters to get values for n_spp, risk, variance, and rr-weighted risk/var. Also get eez values and depths.

cells_of_interest <- readxl::read_excel(file.path(dir_git, 'data_explore', 'cells_to_explore.xlsx'))

cell_summary_file <- file.path(dir_git, 'data_explore', 'cell_sums_of_interest.csv')

reload <- FALSE

if(!file.exists(cell_summary_file) | reload == TRUE) {
  dir_taxa_summaries <- file.path(dir_o_anx, 'taxa_summaries')
  sum_files <- list.files(dir_taxa_summaries,
                          pattern = sprintf('cell_sum_%s.csv', api_version),
                          full.names = TRUE)
  message('reading summary files')
  ptm <- system.time({
    cell_values_all <- parallel::mclapply(sum_files, mc.cores = 32,
                                          FUN = function(x) { ### x <- sum_files[1]
                                            read_csv(x, col_types = 'dddiidi') %>%
                                              mutate(spp_gp = basename(x) %>% 
                                                       str_replace('_cell_sum.+', ''))
                                            }) %>%
      bind_rows()
  }) ### end of system.time
  message('... processing time ', ptm[3], ' sec')
  
  cells_of_interest_sum <- cell_values_all %>%
    filter(cell_id %in% cells_of_interest$cell_id) %>%
    select(cell_id, n_spp_gp = n_spp_risk, spp_gp) %>%
    left_join(cells_of_interest, by = 'cell_id') %>%
    arrange(cell_group, cell_id)
  
  cell_df <- data.frame(cell_id = values(raster(file.path(dir_git, 'output', 
                                                          'cell_id_raster.tif'))),
                        n_spp   = values(raster(file.path(dir_git, 'output',
                                                          'n_spp_risk_raster_010deg.tif'))),
                        risk    = values(raster(file.path(dir_git, 'output',
                                                          'mean_risk_raster_010deg.tif'))),
                        rr_risk = values(raster(file.path(dir_git, 'output', 
                                                          'mean_rr_risk_raster_010deg.tif'))),
                        rr_var  = values(raster(file.path(dir_git, 'output', 
                                                          'var_rr_risk_raster_010deg.tif'))),
                        eez_id  = values(raster(file.path(dir_git, 'spatial', 
                                                          'eez_rast_010_wgs84.tif'))),
                        lme_id  = values(raster(file.path(dir_git, 'spatial', 
                                                          'lme_rast_010_wgs84.tif'))),
                        depth   = values(raster(file.path(dir_git, 'spatial', 
                                                          'bathy_rast_010_wgs84.tif')))) %>%
    filter(cell_id %in% cells_of_interest$cell_id)
  
  cells_info <- cells_of_interest_sum %>%
    full_join(cell_df, by = 'cell_id')
  
  write_csv(cells_info, cell_summary_file)
}

1.2 Get all the species in each cell using the original raster extracts

cells_info <- read_csv(cell_summary_file)

# spp_gps_all <- cells_info$spp_gp %>% unique()
  ### all are represented here...

spp_maps <- read_csv(file.path(dir_data, sprintf('spp_marine_maps_%s.csv', api_version)),
                     col_types = 'ddciccc')

taxa_cells_file <- file.path(dir_git, 'data_explore', 'taxa_spp_cells.csv')

reload <- FALSE

if(!file.exists(taxa_cells_file) | reload == TRUE) {
  ### Make a list of taxonomic groups to loop over:
  taxa <- spp_maps$dbf_file %>%
    unique() %>%
    str_replace('\\....$', '')
  
  taxa_cells_list <- vector('list', length = length(taxa))
  
  for(i in seq_along(taxa)) { ### i <- 5
    taxon <- taxa[i]
    
    spp_ids_in_taxon <- spp_maps %>%
      filter(str_detect(dbf_file, taxon)) %>%
      .$iucn_sid
    cat(sprintf('processing %s spp in %s...\n', length(spp_ids_in_taxon), taxon))
    
    spp_cells <- parallel::mclapply(spp_ids_in_taxon, mc.cores = 32,
      FUN = function(x) { ### x <- spp_ids_in_taxon[1]
        f <- file.path(dir_o_anx, 'spp_rasters',
                       sprintf('iucn_sid_%s_010deg.csv', x))
        if(file.exists(f)) {
          y <- read_csv(f, col_types = 'di') %>%
            mutate(iucn_sid = x) %>%
            select(-presence)  %>%
            filter(cell_id %in% cells_info$cell_id)
        } else {
          y <- data.frame(cell_id = NA,
                          iucn_sid = x, 
                          f = f, error = 'file not found')
        }
        return(y)
      }) %>%
      bind_rows() %>%
      mutate(spp_gp = taxon)
    
    taxa_cells_list[[i]] <- spp_cells
  }
  
  taxa_cells_df <- taxa_cells_list %>%
    bind_rows()  %>%
    filter(!is.na(cell_id)) %>%
    select(cell_id, iucn_sid, spp_gp) %>%
    left_join(spp_maps %>% select(iucn_sid, max_depth), by = 'iucn_sid') %>%
    distinct() 
  
  write_csv(taxa_cells_df, taxa_cells_file)
}
spp_risk <- read_csv(file.path(dir_data, sprintf('iucn_risk_current_%s.csv', api_version)),
                     col_types = 'dc_____d_') %>%
  distinct()

spp_risk_rgn <- read_csv(file.path(dir_data, sprintf('iucn_risk_rgn_current_%s.csv', api_version)),
                         col_types = 'dc_cd_') %>%
  distinct()

spp_ranges   <- read_csv(file.path(dir_data, sprintf('iucn_spp_range_area_%s.csv', api_version)),
                         col_types = 'dd__') %>%
  distinct()

### make a dataframe of species risk and regional risk
spp_risk_all <- spp_risk %>%
  mutate(iucn_rgn = 'global') %>%
  bind_rows(spp_risk_rgn) %>%
  left_join(spp_ranges, by = c('iucn_sid')) %>%
  select(iucn_sid, sciname, iucn_rgn, cat_score, range_km2) %>%
  mutate(range_km2 = round(range_km2))

cells_info <- read_csv(cell_summary_file, col_types = 'd__cd____id') %>%
  distinct()

lme_to_rgn <- read_csv(file.path(dir_git, 'spatial/iucn_rgn_to_lme.csv')) %>%
  rename(rgn_name = iucn_rgn) %>%
  distinct()

taxa_cells_info <- read_csv(taxa_cells_file, col_types = 'ddcc') %>%
  left_join(spp_risk_all, by = 'iucn_sid') %>%
  mutate(spp_gp = tolower(spp_gp)) %>%
  left_join(cells_info, by = c('cell_id')) %>%
    ### add info from rasters, including lme_id
  left_join(lme_to_rgn, by = c('lme_id')) %>%
    ### add region names to filter out regional assessments
  mutate(rgn_name = ifelse(is.na(lme_id), 'global', rgn_name),
         priority = ifelse(rgn_name == 'global', 100, priority)) %>%
    ### LME cells already tagged 'global'; fix non-LME cells  and set low-ranking priority
  filter(iucn_rgn == rgn_name) %>%
  group_by(cell_id, iucn_sid) %>%
  filter(priority == min(priority)) %>%
    ### for each spp in each cell, choose the obs with highest-ranking priority
  ungroup()

spp_cells_info <- taxa_cells_info %>%
  select(cell_id, iucn_sid, sciname, spp_gp, iucn_rgn, 
         cat_score, range_km2, cell_group, depth, spp_max_depth = max_depth)

write_csv(spp_cells_info, file.path(dir_git, 'data_explore', 'spp_cells_scores.csv'))

1.3 Display points on a map and show the species list

For each cell group, plot a map of range-rarity-weighted risk; display the cells for that region on the map;

spp_cells_info <- read_csv(file.path(dir_git, 'data_explore', 'spp_cells_scores.csv'),
                           col_types = 'ddcccddcdc') %>%
  arrange(cell_group)

land_poly <- sf::read_sf(file.path(dir_git, 'spatial/ne_10m_land/ne_10m_land.shp'))


cell_id_file <- file.path(dir_git, 'data_explore/cell_id_latlong.csv')
if(!file.exists(cell_id_file)) {
  cell_id_df     <- raster(file.path(dir_git, 'output', 'cell_id_raster.tif')) %>%
    rasterToPoints() %>%
    as.data.frame() %>%
    setNames(c('x', 'y', 'cell_id')) %>%
    filter(cell_id %in% spp_cells_info$cell_id) %>%
    write_csv(cell_id_file)
} else {
  cell_id_df <- read_csv(cell_id_file)
}

fig_file <- file.path(dir_git, 'data_explore/figs',
                    sprintf('global_map_pts_of_interest.png'))

message(sprintf('Mapping cells for global cells of interest'))

### Attach lat/long values to cell IDs
groups_latlong <- spp_cells_info %>% 
  select(cell_id, cell_group) %>% 
  distinct() %>%
  left_join(cell_id_df, by = 'cell_id') %>%
  group_by(cell_group) %>%
  summarize(x = mean(x), y = mean(y))
  
if(!file.exists(fig_file)) {
  ### create the image and save it
  
  if(!exists('rr_risk_df')) {
    rr_risk_df <- raster(file.path(dir_git, 'output', 'mean_rr_risk_raster_010deg.tif')) %>%
      rasterToPoints() %>%
      as.data.frame() %>%
      setNames(c('x', 'y', 'rr_risk'))
  }
  
  x <- ggplot(rr_risk_df) +
    ggtheme_map() +
    geom_raster(aes(x, y, fill = rr_risk)) +
    geom_sf(data = land_poly, aes(geometry = geometry), fill = 'grey96', color = 'grey40', size = .10) +
    geom_point(data = groups_latlong, aes(x, y), 
               shape = 21, color = 'white', fill = 'white', size = 2.5) +
    geom_point(data = groups_latlong, aes(x, y), 
               shape = 21, color = 'blue', fill = 'yellow', size = 2) +
    scale_fill_gradientn(colors = c('green4', 'lightyellow', 'red2', 'red3', 'red4', 'purple4'),
                         limits = c(0, 1),
                         labels = c('LC', 'NT', 'VU', 'EN', 'CR', 'EX'),
                         breaks = c( 0.0,  0.2,  0.4,  0.6,  0.8,  1.0)) +
    scale_x_continuous(expand = c(0, 0)) +
    scale_y_continuous(expand = c(0, 0)) +
    labs(title = 'Global areas of interest',
         fill  = 'risk')
  
  ggsave(plot = x, file = fig_file,
         width = 6, height = 4, units = 'in', dpi = 300)
}

knitr::include_graphics(path.expand(fig_file))

1.3.1 Region maps with spp data

cell_gps <- unique(spp_cells_info$cell_group)

spp_cell_gps <- vector('list', length = length(cell_gps)) %>%
  setNames(cell_gps)
fig_files <- vector('list', length = length(cell_gps)) %>%
  setNames(cell_gps)

for(cell_gp in cell_gps) {
  ### cell_gp <- cell_gps[1]
  
  fig_file <- file.path(dir_git, 'data_explore/figs',
                      sprintf('rgn_map_%s.png', 
                              tolower(cell_gp) %>% str_replace_all('[^a-z0-9]', '_')))

  message(sprintf('Mapping cells for area of interest: %s\n', cell_gp))

  ### Attach lat/long values to cell IDs
  cells_latlong <- spp_cells_info %>% 
    filter(cell_group == cell_gp) %>%
    select(cell_id, depth) %>% 
    distinct() %>%
    left_join(cell_id_df, by = 'cell_id') %>%
    arrange(cell_id) %>%
    mutate(cell_name = toupper(letters[1:n()]))
  
  spp_cell_gps[[cell_gp]] <- spp_cells_info %>%
    filter(cell_group == cell_gp) %>%
    left_join(cells_latlong, by = c('cell_id', 'depth')) %>%
    select(-x, -y, -cell_group, -cell_id, -depth) %>%
    mutate(present = TRUE) %>%
    spread(cell_name, present, fill = FALSE)

  if(!file.exists(fig_file)) {
    ### create the image and save it
    
    if(!exists('rr_risk_df')) {
      rr_risk_df <- raster(file.path(dir_git, 'output', 'mean_rr_risk_raster_010deg.tif')) %>%
        rasterToPoints() %>%
        as.data.frame() %>%
        setNames(c('x', 'y', 'rr_risk'))
    }
    
    ### define bounding box with some margin around it:
    bbox <- c(xmin = max(min(cells_latlong$x) - 4, -179.95),
              xmax = min(max(cells_latlong$x) + 4,  179.95),
              ymin = max(min(cells_latlong$y) - 2,  -89.95),
              ymax = min(max(cells_latlong$y) + 2,   89.95))
    
    depth_label <- sprintf('Depths: %s', 
                           paste0(cells_latlong$cell_name, ': ', 
                                  cells_latlong$depth, ' m', 
                                  collapse = '; '))
      
    x <- ggplot(rr_risk_df) +
      ggtheme_map() +
      geom_raster(aes(x, y, fill = rr_risk)) +
      geom_sf(data = land_poly, aes(geometry = geometry), fill = 'grey96', color = 'grey40', size = .10) +
      geom_point(data = cells_latlong, aes(x, y), 
                 shape = 21, color = 'blue', fill = 'yellow', size = 2) +
      geom_text(data = cells_latlong, aes(x, y, label = cell_name),
                nudge_x = .15, nudge_y = .15) +
      scale_fill_gradientn(colors = c('green4', 'lightyellow', 'red2', 'red3', 'red4', 'purple4'),
                           limits = c(0, 1),
                           labels = c('LC', 'NT', 'VU', 'EN', 'CR', 'EX'),
                           breaks = c( 0.0,  0.2,  0.4,  0.6,  0.8,  1.0)) +
      annotate('text', x = bbox[1] + .05, y = bbox[3] + .05,
               label = depth_label, hjust = 0, vjust = 0) +
      coord_sf(xlim = c(bbox[1], bbox[2]),  ylim = c(bbox[3], bbox[4])) +
      scale_x_continuous(expand = c(0, 0)) +
      scale_y_continuous(expand = c(0, 0)) +
      labs(title = cell_gp,
           fill  = 'mean risk')
    
    ggsave(plot = x, file = fig_file,
           width = 6, height = 4, units = 'in', dpi = 300)
  }
  
  fig_files[[cell_gp]] <- fig_file

}

cell_gp <- cell_gps[1]

knitr::include_graphics(path.expand(fig_files[[cell_gp]]))

DT::datatable(spp_cell_gps[[cell_gp]], caption = cell_gp)

cell_gp <- cell_gps[2]

knitr::include_graphics(path.expand(fig_files[[cell_gp]]))

DT::datatable(spp_cell_gps[[cell_gp]], caption = cell_gp)

cell_gp <- cell_gps[3]

knitr::include_graphics(path.expand(fig_files[[cell_gp]]))

DT::datatable(spp_cell_gps[[cell_gp]], caption = cell_gp)

cell_gp <- cell_gps[4]

knitr::include_graphics(path.expand(fig_files[[cell_gp]]))

DT::datatable(spp_cell_gps[[cell_gp]], caption = cell_gp)

cell_gp <- cell_gps[5]

knitr::include_graphics(path.expand(fig_files[[cell_gp]]))

DT::datatable(spp_cell_gps[[cell_gp]], caption = cell_gp)

cell_gp <- cell_gps[6]

knitr::include_graphics(path.expand(fig_files[[cell_gp]]))

DT::datatable(spp_cell_gps[[cell_gp]], caption = cell_gp)

cell_gp <- cell_gps[7]

knitr::include_graphics(path.expand(fig_files[[cell_gp]]))

DT::datatable(spp_cell_gps[[cell_gp]], caption = cell_gp)

cell_gp <- cell_gps[8]

knitr::include_graphics(path.expand(fig_files[[cell_gp]]))

DT::datatable(spp_cell_gps[[cell_gp]], caption = cell_gp)

cell_gp <- cell_gps[9]

knitr::include_graphics(path.expand(fig_files[[cell_gp]]))

DT::datatable(spp_cell_gps[[cell_gp]], caption = cell_gp)

cell_gp <- cell_gps[10]

knitr::include_graphics(path.expand(fig_files[[cell_gp]]))

DT::datatable(spp_cell_gps[[cell_gp]], caption = cell_gp)

cell_gp <- cell_gps[11]

knitr::include_graphics(path.expand(fig_files[[cell_gp]]))

DT::datatable(spp_cell_gps[[cell_gp]], caption = cell_gp)

cell_gp <- cell_gps[12]

knitr::include_graphics(path.expand(fig_files[[cell_gp]]))

DT::datatable(spp_cell_gps[[cell_gp]], caption = cell_gp)

cell_gp <- cell_gps[13]

knitr::include_graphics(path.expand(fig_files[[cell_gp]]))

DT::datatable(spp_cell_gps[[cell_gp]], caption = cell_gp)

cell_gp <- cell_gps[14]

knitr::include_graphics(path.expand(fig_files[[cell_gp]]))

DT::datatable(spp_cell_gps[[cell_gp]], caption = cell_gp)

cell_gp <- cell_gps[15]

knitr::include_graphics(path.expand(fig_files[[cell_gp]]))

DT::datatable(spp_cell_gps[[cell_gp]], caption = cell_gp)

cell_gp <- cell_gps[16]

knitr::include_graphics(path.expand(fig_files[[cell_gp]]))

DT::datatable(spp_cell_gps[[cell_gp]], caption = cell_gp)

cell_gp <- cell_gps[17]

knitr::include_graphics(path.expand(fig_files[[cell_gp]]))

DT::datatable(spp_cell_gps[[cell_gp]], caption = cell_gp)

cell_gp <- cell_gps[18]

knitr::include_graphics(path.expand(fig_files[[cell_gp]]))

DT::datatable(spp_cell_gps[[cell_gp]], caption = cell_gp)